This is the work for machine faillure prediction. We are predicting, the state of the machine in one hour, accordding to what we learn from the predecedent data set.
#################################### Preparing the data set ################################################
# Installation of packages and loading them ###########################
#### Installation of necessary packages, need to do it only one time
#install.packages("corrplot")
# install.packages("caret")
#install.packages("randomForest")
# install.packages("MASS")
# install.packages("rpart")
# install.packages("e1071")
#install.packages("glmnet")
#install.pacakges("plotly")
#install.packages("missMDA")
#install.packages("pROC")
#install.packages("DMwR")
#install.packages("gbm")
# install.packages("rattle")
# install.packages("rpart.plot")
# install.packages("RColorBrewer")
# install.packages("party")
# install.packages("partykit")
#### Loading the necessary packages
library(pROC) #for the roc curve methods
library("MASS")
library("rpart")
library("randomForest")
library("e1071")
library("glmnet")
library(plotly)
library(ggplot2)
library(missMDA)
library(caret)
library(DMwR)
library(rpart)
library(rattle)
library(rpart.plot)
library(RColorBrewer)
library(party)
library(partykit)
library(caret)
then we set the directory loading the data set resampled in one hour interval
setwd("/home/moustapha/Energiency Big Data Project/Archive")
data = read.table("all1h2.csv", header = TRUE, sep = ",")
DateTS <- as.POSIXlt(data$X, format = "%Y-%m-%d %H:%M:%S")
data$X = DateTS ; colnames(data)[1] = "date" ;rownames(data) = data$date
## date prodh elec
## Min. :2012-12-31 23:00:00 Min. : 0.00 Min. : 0.00
## 1st Qu.:2013-10-20 21:45:00 1st Qu.:38.66 1st Qu.:20.58
## Median :2014-08-09 20:30:00 Median :42.98 Median :21.20
## Mean :2014-08-09 20:57:44 Mean :38.67 Mean :19.93
## 3rd Qu.:2015-05-29 19:15:00 3rd Qu.:45.82 3rd Qu.:21.89
## Max. :2016-03-17 18:00:00 Max. :52.61 Max. :24.09
## NA's :1460 NA's :1284
## gram planstop prod
## Min. : 0 Min. :0.000 Min. : 0.00
## 1st Qu.: 42 1st Qu.:1.000 1st Qu.:37.74
## Median : 45 Median :1.000 Median :42.71
## Mean : 46818 Mean :0.802 Mean :38.33
## 3rd Qu.: 45 3rd Qu.:1.000 3rd Qu.:45.41
## Max. :488110 Max. :1.000 Max. :51.77
## NA's :22273 NA's :17623 NA's :22957